Continent-wide declines in reef species over a decade of ocean warming

Freddie J. Heather

Last compiled on: 2022-07-13

About this script

The following document provides the R code for the analysis for the paper: Edgar et al. (In review.) Continent-wide trends in reef populations over a decade of ocean warming.

R-version 4.2.0 (2022-04-22)
platform x86_64-w64-mingw32
Article DOI
Article link
Article citation Edgar et al. (In review.) Continent-wide trends in reef populations over a decade of ocean warming.
Time series 1992-2021
Geographical scale Continental Australia
Code author contact

Set-up

Required R packages

Loading the required packages for this analysis.

library(tidyverse)
library(janitor)
library(zoo, include.only = "na.approx")
library(magick)
library(cowplot)
library(scales)
library(patchwork)
library(ggpubr, include.only = "as_ggplot")
library(ragg)
library(gt)
library(gtable)
library(grid)
library(nlme)
library(here)
library(kableExtra)

# "not in" function
`%!in%` <- Negate(`%in%`)

Reading in raw data

The raw data is in a wide format, with the site ID (site_code), survey method (method), species identity (species_name), and the standard density per 500 \(m^2\) surveyed is stored within the year columns (e.g. 1992, 1993, etc.).

# main dataframe of species density
data_raw_wide <- 
  read_csv(file = here("input", "data", "count_data.csv"), 
           show_col_types = FALSE, 
           skip_empty_rows = TRUE) 

# species-level information
species_tbl <- 
  read_csv(file =  here("input", "data", "species_info.csv"), 
           show_col_types = FALSE, 
           skip_empty_rows = TRUE) 

# temperature timeseries (post-2008) data for each site
temperature_raw <- 
  read_csv(file =  here("input", "data", "temperature_timeseries.csv"),
           show_col_types = FALSE, 
           skip_empty_rows = TRUE)  

Let’s look at the first six rows of the raw data (in wide format).

data_raw_wide |> head()
## # A tibble: 6 × 41
##   site_code site_id       site_name state  data  method latitude longitude taxon
##   <chr>     <chr>         <chr>     <chr>  <chr> <chr>     <dbl>     <dbl> <chr>
## 1 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS      -19.8      149. Vert…
## 2 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS      -19.8      149. Vert…
## 3 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS      -19.8      149. Vert…
## 4 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS      -19.8      149. Vert…
## 5 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS      -19.8      149. Vert…
## 6 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS      -19.8      149. Vert…
## # … with 32 more variables: biogeog <chr>, species_name <chr>, x1992 <dbl>,
## #   x1993 <dbl>, x1994 <dbl>, x1995 <dbl>, x1996 <dbl>, x1997 <dbl>,
## #   x1998 <dbl>, x1999 <dbl>, x2000 <dbl>, x2001 <dbl>, x2002 <dbl>,
## #   x2003 <dbl>, x2004 <dbl>, x2005 <dbl>, x2006 <dbl>, x2007 <dbl>,
## #   x2008 <dbl>, x2009 <dbl>, x2010 <dbl>, x2011 <dbl>, x2012 <dbl>,
## #   x2013 <dbl>, x2014 <dbl>, x2015 <dbl>, x2016 <dbl>, x2017 <dbl>,
## #   x2018 <dbl>, x2019 <dbl>, x2020 <dbl>, x2021 <dbl>

Data cleaning

Convert to long-format

For the analysis, we want the data in a long (aka. tidy) format - a single row per observation. So we move the “year” columns to a single column (year) and add a column which we will call count_raw, which refers to the count for that species at that site at that year.

We also change any zero-count values to NA for Macroalgae species, as macroalgae were not surveyed every year. Where appropriate these will then be changed back to zero’s in the next step.

# all year columns start with "x"
data_raw_long <-
  data_raw_wide |> 
  pivot_longer(
    cols = starts_with("x"),
    values_to = "count_raw",
    names_to = "year", 
    names_prefix = "x",
    names_transform = list(year = as.integer)
  ) |> 
  # all zero's become NA
  mutate(count_raw = case_when(count_raw == 0 & taxon == "Macroalgae" ~ NA_real_,
                                TRUE ~ count_raw))

Convert NA values to zero

We want NA values to be changed to zeros when all of the following conditions are met:

  1. The site was surveyed that year.
  2. The species was observed at least once at that site in any other year.
  3. The taxon (Macroalgae) was recorded at that site at that year.
  4. The current count is NA.

To do this we need to make a list of all the site*year combinations, all site*species combinations, and all site*year*taxon combinations that have at least one observation. If the row in question has a site*year combination that is found in the overall site*year combination list, then we know that site was surveyed in that year and therefore meets the first condition. We repeat for the first three conditions, if all three are met and the count is NA, we replace with a zero value.

# A list of all the site*year combinations
site_byyear <-
  data_raw_long |> 
  filter(!(is.na(count_raw)|count_raw == 0)) |> # not recorded
  select(site_id, year) |> 
  distinct() |> 
  mutate(site_id_yr = paste(site_id, year, sep = "_"))

# A list of all the site*year*species combinations
site_byspp <-
  data_raw_long |> 
  filter(!(is.na(count_raw)|count_raw == 0)) |> # not recorded
  select(site_id, species_name) |> 
  distinct() |> 
  mutate(site_id_spp = paste(site_id, species_name, sep = "_"))


# A list of all the site*year*taxon combinations
# note: some surveys surveyed only a given taxa in a given year at a site 
# (e.g. they may not have surveyed Macroalgae at site X in year Y, therefore 
# the counts for Macroalgae at this site*year should be NA and not zero)
# true example: JMP-S1 did not survey macroalgae in 2008 and 2015 
site_byyr_bytaxon <-
  data_raw_long |> 
  filter(!(is.na(count_raw)|count_raw == 0)) |> # not recorded
  # filter(!((is.na(count_raw)|count_raw == 0) & taxon != "Coral")) |> # not recorded
  select(site_id, year, taxon) |> 
  distinct() |> 
  mutate(site_id_year_taxon = paste(site_id, year, taxon, sep = "_"))


# changing NA counts to zero counts, where appropriate 
# (i.e. meets the criteria above)
data_addingzeros <- 
  data_raw_long |> 
  mutate(site_id_yr = paste(site_id, year, sep = "_"), 
         site_id_spp = paste(site_id, species_name, sep = "_"), 
         site_id_year_taxon = paste(site_id, year, taxon, sep = "_"), 
         spp_at_site = site_id_spp %in% site_byspp$site_id_spp, 
         was_surveyed = site_id_yr %in% site_byyear$site_id_yr, 
         surveyed_taxon_that_year = 
           site_id_year_taxon %in% site_byyr_bytaxon$site_id_year_taxon, 
         na_count = is.na(count_raw)
  ) |> 
  mutate(count_raw = case_when(
    na_count & 
      was_surveyed & 
      spp_at_site & 
      surveyed_taxon_that_year ~ 0,
    TRUE ~ count_raw
  )) |> 
  select(names(data_raw_long))

Final clean

A few final clean-ups: rounding the latitude and longitude to 1x1 gricells, and creating a variable called transect_id, which is the site_id and the method, which represents a single transect (\(500 m^2\) or \(100 m^2\) depending on the method) performed.

# this will be used as the main dataframe
data_clean <- 
  data_addingzeros |> 
  ungroup() |> 
  mutate(latitude  = round(latitude),
         longitude = round(longitude)) |> 
  mutate(transect_id = paste(site_id, method, sep = "_"))

Let’s have a look at the first six rows of the cleaned data.

data_clean |> head()
## # A tibble: 6 × 14
##   site_code site_id       site_name state  data  method latitude longitude taxon
##   <chr>     <chr>         <chr>     <chr>  <chr> <chr>     <dbl>     <dbl> <chr>
## 1 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS        -20       149 Vert…
## 2 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS        -20       149 Vert…
## 3 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS        -20       149 Vert…
## 4 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS        -20       149 Vert…
## 5 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS        -20       149 Vert…
## 6 19131S-1  19131S-1_AIMS 19131S-1  North… AIMS  AIMS        -20       149 Vert…
## # … with 5 more variables: biogeog <chr>, species_name <chr>, year <int>,
## #   count_raw <dbl>, transect_id <chr>
data_clean |> 
  pull(site_code) |> 
  unique() |> 
  length()
## [1] 1636
data_clean |> 
  select(site_code, data) |> 
  distinct() |> 
  nrow()
## [1] 1740
data_clean |> 
  select(site_code, data) |> 
  distinct() |> 
  count(data)
## # A tibble: 3 × 2
##   data      n
##   <chr> <int>
## 1 AIMS    279
## 2 ATRC    356
## 3 RLS    1105
data_clean |> 
  select(site_code, data2 = data) |> 
  distinct() |> 
  mutate(n = 1) |> 
  pivot_wider(names_from = c(data2), values_from  = n) |> 
  rowwise() %>%
  mutate(sum = sum(AIMS,ATRC, RLS, na.rm = T)) |> 
  filter(sum != 1) 
## # A tibble: 104 × 5
## # Rowwise: 
##    site_code  AIMS  ATRC   RLS   sum
##    <chr>     <dbl> <dbl> <dbl> <dbl>
##  1 BMP-S10      NA     1     1     2
##  2 BMP-S11      NA     1     1     2
##  3 BMP-S13      NA     1     1     2
##  4 BMP-S14      NA     1     1     2
##  5 BMP-S15      NA     1     1     2
##  6 BMP-S16      NA     1     1     2
##  7 BMP-S17      NA     1     1     2
##  8 BMP-S18      NA     1     1     2
##  9 BMP-S19      NA     1     1     2
## 10 BMP-S20      NA     1     1     2
## # … with 94 more rows

Checking the number of years we have in the data?

data_clean |> pull(year) |> range()
## [1] 1992 2021

Data wrangling

Orgainising data

Each row in the wide format corresponds to a population trend, each row has a unique site/data-source/method as well as unique species. Each species corresponds to a taxon (e.g., Vertebrate or Coral) and a biogeography (e.g., cool or warm). Each site corresponds to a specific state (e.g., Northeast or Northwest). We want to create two ID tables to be used for location-based information and species based information.

  1. Species-level information (already exists; called species_tbl)
  2. Location level information (transect-level; now called transect_tbl)

If we need to access the transect-level or site-level information we can left_join() the ID tables to the basic dataframe (data_basic). Here I use the term “transect” to refer to the site/source/method combination.

# unique identifier of the transect (site_code, data, and method)
transect_tbl <-
  data_clean |> 
  select(transect_id, site_code, state, data, method, latitude, longitude) |> 
  distinct()

# only the columns we need
data_basic <- 
  data_clean |> 
  select(transect_id, species_name, year, count_raw)

Let’s check out the first six rows of the basic data frame.

data_basic |> head()
## # A tibble: 6 × 4
##   transect_id        species_name                 year count_raw
##   <chr>              <chr>                       <int>     <dbl>
## 1 19131S-1_AIMS_AIMS Acanthochromis polyacanthus  1992        NA
## 2 19131S-1_AIMS_AIMS Acanthochromis polyacanthus  1993        NA
## 3 19131S-1_AIMS_AIMS Acanthochromis polyacanthus  1994        NA
## 4 19131S-1_AIMS_AIMS Acanthochromis polyacanthus  1995         8
## 5 19131S-1_AIMS_AIMS Acanthochromis polyacanthus  1996         8
## 6 19131S-1_AIMS_AIMS Acanthochromis polyacanthus  1997         4

Wrangling temeprature data

To get the temeprature values for each state (e.g. “Northeast”, “Southwest” etc.), we get the mean temeprature (per year) for each 1x1 degree grid cells, and then the mean temperature for each state (by year).

We then want to standardise the state temperature values relative to the values of 2008.

# state-level temperature values
state_sst <- 
  data_raw_wide |> 
  select(site_name, state, latitude, longitude) |> 
  mutate(latitude = round(latitude),
         longitude = round(longitude)) |> 
  distinct() |> 
  left_join(temperature_raw, by = "site_name") |> 
  drop_na() |>
  group_by(state, latitude, longitude, year) |> 
  summarise(mean_sst = mean(sst), 
            .groups = "drop") |> 
  group_by(state, year) |> 
  summarise(mean_sst = mean(mean_sst), 
            .groups = "drop")

# temperature data for 2008 for each state
sst_2008 <- 
  state_sst |> 
  filter(year == 2008) |> 
  select(state, 
         sst_2008 = mean_sst)

# temeperature values relative to 2008
state_sst_std <- 
  state_sst |> 
  left_join(sst_2008) |> 
  mutate(year = as.numeric(year)) |> 
  mutate(sst_std = mean_sst - sst_2008) 

Predicting decadal change

frequency_tbl <-
  data_clean |> 
  select(site_id, species_name) |> 
  distinct() |> 
  count(species_name, 
        name = "n_at_allsites")

# NA values come from the rho/pval/sig columns from final_output
allspp <- 
  final_output |> 
  left_join(species_tbl) |>
  left_join(frequency_tbl) |> 
  ungroup()
# function to fit the gls
fit_gls <- function(biogeography, taxon_group){
  
  mod <- 
    allspp |> 
    filter(biogeog == biogeography,
           taxon   == taxon_group) |> 
    gls(model =  
          log(decade_change) ~
          scale(log(max_length)) +
          scale(log(max_depth)) +
          scale(lat_span_degrees) +
          scale(log(n_at_allsites)) +
          scale(t_mean) +
          scale(log(chl_mean)),
        weights = varIdent(form=~1|class)
    )
  
  confint_mod <-
    mod |> 
    confint(level  = 0.95) |> 
    as_tibble(rownames = "term") |> 
    clean_names()
  
  
  mod |> 
    summary() |>
    {\(x) x$tTable}() |>
    as_tibble(rownames = "term") |>
    left_join(confint_mod, by = "term")
  
}

# cleaning the names of the model terms
label_tbl <- 
  tibble(
    term = c(
      "scale(log(max_length))",
      "scale(log(max_depth))",
      "scale(lat_span_degrees)",
      "scale(log(n_at_allsites))",
      "scale(t_mean)",
      "scale(log(chl_mean))",
      "(Intercept)"
    ),
    axis_title = c( "Max. Length",
                    "Max. Depth",
                    "Lat. Span",
                    "Frequency",
                    "Temperature",
                    "Chlorophyll",
                    "Intercept")) |> 
  rownames_to_column()

gls_output <- 
  expand_grid(biogeog = c("tropical",
                          "warm",
                          "cool"),
              taxon = c("Vertebrate",
                        "Invertebrate",
                        "Coral",
                        "Macroalgae")
  ) |> 
  filter(!(taxon == "Macroalgae" & biogeog == "tropical"),
         !(taxon == "Coral" & biogeog != "tropical")) |> 
  mutate(label = paste0(LETTERS[1:9], ". ", taxon)) |> 
  mutate(model_out = map2(.x = biogeog, 
                          .y = taxon, 
                          .f = ~fit_gls(biogeography = .x, 
                                        taxon_group = .y))) |> 
  unnest(cols = c(model_out)) |> 
  left_join(label_tbl, by = "term") |> 
  mutate(axis_title = fct_reorder(axis_title, desc(rowname)))  |>
  clean_names() |>
  mutate(biogeog = factor(biogeog, 
                          levels = c("tropical",
                                     "warm",
                                     "cool"),
                          labels = c("Tropical",
                                     "Warm",
                                     "Cool"))) |>
  mutate(taxon = factor(taxon, levels = c("Vertebrate",
                                          "Invertebrate",
                                          "Coral",
                                          "Macroalgae"))) |> 
  left_join(as_tibble(biogeog_cols, 
                      rownames = "biogeog") |> 
              rename(point_col = value), by = "biogeog") |> 
  mutate(bkgrd = if_else(value < 0, grey_bkgrnd_col, "white"),
         fill_col = if_else(p_value < 0.05, point_col, bkgrd))
fig05 <- 
  gls_output |> 
  ggplot() +
  aes(x = value, 
      y = axis_title,  
      col = biogeog, 
      label = label) +
  geom_rect(aes(xmin = -Inf,
                xmax = 0,
                ymin = -Inf,
                ymax = Inf),
            fill = grey_bkgrnd_col, 
            col  = grey_bkgrnd_col) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_segment(aes(x = x2_5_percent, 
                   xend = x97_5_percent, 
                   yend = axis_title),
               size = 1.5, 
               show.legend = FALSE) +
  geom_point(size = 4, 
             pch = 21,
             stroke = 1.5, 
             aes(fill = fill_col)) +
  scale_fill_identity("fill_col") +
  scale_colour_manual("biogeog", 
                      values = biogeog_cols,
                      guide = guide_legend(
                        override.aes = list(pch = 19) )) +
  facet_wrap(label~., 
             scales = "free_x") +
  guides(override.aes = list(pch = 19)) + 
  theme_minimal() +
  theme(
    strip.text.x =  element_text(face = "bold", hjust = 0.5),
    axis.title = element_blank(),
    strip.background = element_blank(),
    strip.placement = "none",
    axis.text = element_text(size = 12),
    strip.text = element_text(size = 12),
    panel.grid = element_blank(),
    panel.spacing = unit(1, "lines"),
    legend.position = "bottom",
    legend.title = element_blank(),
    axis.text.y.right = element_text(colour = "black", 
                                     hjust = 0.95),
    axis.line = element_line(size = 1),
    axis.ticks = element_line(size = 1),
    plot.margin = margin(r = 10, b = 5,  t = 5),
    legend.margin = margin(t=-0,l=0,b=0,r=0.2, unit='cm'),
    legend.background = element_rect(size = 0.5))

Let’s have a look at the figure

here("output", "figs", "fig05.png") |> knitr::include_graphics()

Useful statistics

Count for each taxonomic class

species_tbl |> 
  count(class) |>
  rename(Class = class,
         `Number of species` = n) |> 
  kable() |> 
  kable_styling("striped", full_width = F, position = "center") 
Class Number of species
Actinopterygii 705
Anthozoa 55
Asteroidea 27
Bivalvia 5
Brown algae 48
Cephalopoda 4
Chondrichthyes 19
Crinoidea 7
Echinoidea 20
Gastropoda 55
Green algae 15
Holothuroidea 11
Malacostraca 13
Red algae 71
Reptilia 2
cool_echinoderms_species <- 
  species_tbl |> 
  filter(class %in% c("Holothuroidea", 
                      "Crinoidea",
                      "Asteroidea",
                      "Echinoidea")) |>
  filter(biogeog == "cool") |> 
  pull(species_name)

warm_echinoids_species <- 
  species_tbl |> 
  filter(class %in% c("Echinoidea")) |> 
  filter(biogeog == "warm") |> 
  pull(species_name)

cool_echinoderms_data <-
  final_output |> 
  left_join(species_tbl) |> 
  filter(species_name %in% cool_echinoderms_species) |> 
  mutate(taxon = "cool_echinoderms") 

warm_echinoids_data <-
  final_output |> 
  left_join(species_tbl) |> 
  filter(species_name %in% warm_echinoids_species) |> 
  mutate(taxon = "warm_echonoids") 

coral_spp_data <- 
  final_output |> 
  filter(taxon == "Coral") |> 
  mutate(taxon = "all_corals") 

specific_coral <- 
  final_output |> 
  filter(species_name == "Astreopora myriophthalma") |> 
  mutate(taxon = "A_myriophthalma") 

sig_coral_output <-
  final_output |> 
  filter(taxon == "Coral", pval < 0.05)

all_macroalgae_data <- 
  final_output |> 
  filter(taxon == "Macroalgae") |> 
  mutate(taxon = "all_macroalgae") 

extra_data <- 
  cool_echinoderms_data |> 
  bind_rows(warm_echinoids_data) |> 
  bind_rows(coral_spp_data) |> 
  bind_rows(specific_coral) |> 
  bind_rows(all_macroalgae_data) |> 
  group_by(taxon, biogeog) |> 
  summarise(y = mean(slope, na.rm = TRUE),
            sd = sd(slope, na.rm = TRUE), 
            n = n(),
            .groups = "drop") |> 
  mutate(ci = qnorm(.95)*(sd/sqrt(n))) |> 
  mutate(uppr = y + ci,
         lwr = y - ci) |> 
  mutate(decade_change = exp(y*10),
         decade_change_lwr = exp(lwr*10),
         decade_change_uppr = exp(uppr*10)) 

dec_places <- function(x, n = 2) format(round(x, n),  nsmall = n) |> as.numeric()

fig04_data_separated |> 
  select(taxon, decade_change,biogeog, n, decade_change_lwr, decade_change_uppr) |> 
  filter(taxon != "All species") |> 
  bind_rows(fig04_data_allspp |> 
              select(taxon, decade_change,n, decade_change_lwr, decade_change_uppr)) |> 
  bind_rows(extra_data |> select(taxon, decade_change,n, decade_change_lwr, decade_change_uppr)) |> 
  mutate(decade_change_round = dec_places(decade_change), 
         decade_change_lwr_round = dec_places(decade_change_lwr),
         decade_change_uppr_round = dec_places(decade_change_uppr),
         perc_change = (decade_change-1)*100) |> 
  rename(n_species = n) |> 
  write_csv("output/data/decade_change_values.csv")

Mutivariate p-values

gls_output |> 
  mutate(sig = case_when(p_value <= 0.001 ~ "p<0.001 ***",
                         p_value <= 0.01  ~ "p<0.01 **",
                         p_value <= 0.05  ~ "p<0.05*",
                         TRUE ~ "")) |>
  mutate(direction = case_when(value == 0 & p_value <= 0.05 ~ "no change",
                               value > 0 & p_value <= 0.05~ "increase",
                               value < 0 & p_value <= 0.05~ "decrease")) |> 
  # filter(p_value <= 0.055) |> # also show marginally significant
  left_join(label_tbl) |> 
  select(Biogeograpghy = biogeog, 
         Taxon = taxon, 
         Term = axis_title, 
         p_value, 
         Significance = sig, 
         Direction = direction) |> 
  write_csv("output/data/mutlivariate_output_values.csv")

Decadal change values

# values from fig01 in the paper
fig04_data_separated |> 
  filter(!is.na(biogeog)) |> 
  bind_rows(fig04_data_allspp) |> 
  bind_rows(extra_data) |> 
  select(taxon, biogeog, starts_with("decade")) |> 
  mutate(perc_change = (decade_change-1)*100) |> 
  mutate(across(where(is.numeric), ~round(.x, 2)))|> 
  kable() |> 
  kable_styling("striped", full_width = F, position = "center") 
taxon biogeog decade_change decade_change_lwr decade_change_uppr perc_change
Coral Tropical 1.03 0.92 1.14 2.51
Coral Warm 0.79 0.54 1.15 -20.87
Mobile invertebrates Cool 0.82 0.72 0.93 -18.23
Mobile invertebrates Tropical 1.36 1.13 1.64 35.92
Mobile invertebrates Warm 0.97 0.78 1.20 -2.83
Macroalgae Cool 0.89 0.80 0.99 -10.70
Macroalgae Warm 1.16 1.02 1.31 15.71
Vertebrates Cool 0.81 0.71 0.93 -18.73
Vertebrates Tropical 0.85 0.81 0.89 -15.14
Vertebrates Warm 0.93 0.85 1.02 -7.12
All species NA 0.90 0.87 0.94 -9.54
A_myriophthalma NA 0.81 NA NA -18.64
all_corals NA 1.02 0.92 1.13 1.55
all_macroalgae NA 0.96 0.89 1.05 -3.52
cool_echinoderms cool 0.80 0.69 0.93 -19.75
warm_echonoids warm 0.60 0.43 0.84 -39.79
data_clean |> 
  filter(species_name %in% sig_coral_output$species_name) |> 
  mutate(count_type = case_when(count_raw == 0 ~ "zero",
                                count_raw > 0 ~ "positive")) |> 
  count(species_name, count_type) |> 
  filter(!count_type |> is.na()) |> 
  add_count(species_name, 
            wt = n, 
            name = "total_n") |> 
  kable() |> 
  kable_styling("striped", full_width = F, position = "center") 
species_name count_type n total_n
Acropora austera positive 56 100
Acropora austera zero 44 100
Acropora loripes positive 77 154
Acropora loripes zero 77 154
Acropora valida positive 79 174
Acropora valida zero 95 174
Astreopora myriophthalma positive 107 224
Astreopora myriophthalma zero 117 224
Echinopora lamellosa positive 110 200
Echinopora lamellosa zero 90 200
final_output |> 
  filter(taxon == "Macroalgae", pval < 0.05) |> 
  ungroup() |> 
  count(direction)
## # A tibble: 2 × 2
##   direction     n
##   <chr>     <int>
## 1 down         14
## 2 up           11
final_output |> 
  filter(taxon == "Macroalgae") |> 
  pull(slope) |> 
  t.test()
## 
##  One Sample t-test
## 
## data:  pull(filter(final_output, taxon == "Macroalgae"), slope)
## t = -0.68944, df = 133, p-value = 0.4917
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.013854772  0.006692714
## sample estimates:
##    mean of x 
## -0.003581029

Number of sites surveyed

taxon_cols2 <-
  c(
    Vertebrates = "#ffd600ff",
    Macroalgae = "#006500ff",
    Coral = "#ff7f4eff",
    "Mobile Invertebrates" = "#a01feeff"
  )

sitecode_pre2008 <- 
  data_clean |> 
  filter(!is.na(count_raw), 
         year < 2008) |> 
  pull(site_code) |> 
  unique()

extended_fig02 <- 
  
  data_clean |> 
  filter(!is.na(count_raw), 
         year >= 2008) |> 
  select(site_code, taxon, year) |> 
  distinct() |> 
  count(taxon, site_code, name = "n_annual_survs") |> 
  mutate(pre2008 = as.numeric(site_code %in% sitecode_pre2008)) |> 
  mutate(n_annual_survs_edit = ifelse(n_annual_survs == 1, n_annual_survs + pre2008, n_annual_survs)) |> 
  arrange(desc(n_annual_survs_edit)) |> 
  count(taxon, n_annual_survs_edit, name = "n_sites") |> 
  mutate(taxon = factor(taxon, 
                        levels = c("Vertebrate", 
                                   "Invertebrate",
                                   "Macroalgae",
                                   "Coral"),
                        labels = c("Vertebrates",
                                   "Mobile Invertebrates",
                                   "Macroalgae",
                                   "Coral")
  )) |> 
  ggplot(aes(x = n_annual_survs_edit, 
             y = n_sites, 
             fill = taxon)) +
  geom_col(width = 0.6, 
           position = position_dodge(width = 0.6, preserve = "single"), 
           colour = "black") +
  scale_fill_manual("taxon", 
                    values = taxon_cols2) +
  scale_x_continuous(breaks = pretty_breaks(), label = label_number(), expand = c(.01,.01)) +
  scale_y_continuous(breaks = pretty_breaks(), label = label_number(), expand = c(.01,.01)) +
  labs(x = "Number of annual surveys",
       y = "Number of sites") +
  theme_classic(20) +
  theme(
    # axis.text = element_text(size = 12),
    legend.position = c(0.95,0.95),
    legend.justification = c(1,1),
    legend.title = element_blank(),
    axis.text.y.right = element_text(colour = "black", 
                                     hjust = 0.95),
    axis.line = element_line(size = 1),
    axis.ticks = element_line(size = 1),
    plot.margin = margin(r = 10, b = 5,  t = 5),
    legend.margin = margin(t=0,l=2,b=2,r=2, unit='mm'),
    legend.background = element_rect(size = 0.5))

save_plot(filename = here("output", "figs", "extended_fig02.png"), 
          plot = extended_fig02, 
          base_height = 7)
here("output", "figs", "extended_fig02.png") |> knitr::include_graphics()

Sensitivity analysis

plot_sensitivity <- function(nsurvs, remove_x_axis = FALSE){
 
    site_code_tokeep <- 
    data_clean |> 
    filter(!is.na(count_raw), 
           year >= 2008) |> 
    select(site_code, taxon, year) |> 
    distinct() |> 
    count(taxon, site_code, name = "n_annual_survs") |> 
    mutate(pre2008 = as.numeric(site_code %in% sitecode_pre2008)) |> 
    mutate(n_annual_survs_edit = ifelse(n_annual_survs == 1, n_annual_survs + pre2008, n_annual_survs)) |> 
    filter(n_annual_survs_edit >= nsurvs)  |> 
    pull(site_code) |> 
    unique()
  
  total_site_codes <-
    data_clean |> 
    filter(!is.na(count_raw), 
           year >= 2008) |> 
    select(site_code, year) |> 
    distinct() |> 
    count(site_code) |> 
    pull(site_code) |> 
    unique()
  
  perc_transid <- (length(site_code_tokeep)*100/length(total_site_codes)) |> round(1)
  
  count_latlonmean <- 
    data_full |>
    filter(year >= 2011) |>
    left_join(transect_tbl) |> 
    left_join(species_tbl) |> 
    # mutate(site_id = paste(site_code, data, sep = "_")) |> 
    filter(site_code %in% site_code_tokeep) |> 
    group_by(latitude, longitude, taxon, species_name, year) |> 
    summarise(mean_count = mean(count_extrap, na.rm = T), 
              .groups = "drop")
  
  count_sppmean <- 
    count_latlonmean |>
    group_by(taxon, species_name, year) |> 
    summarise(mean_count = mean(mean_count, na.rm = T), 
              .groups = "drop")
  
  spp_min_meancount <- 
    count_sppmean |> 
    group_by(species_name) |> 
    filter(mean_count > 0) |> 
    summarise(min_meancount = min(mean_count, na.rm = TRUE))
  
  # Mean count within species
  count_sppmean_log <- 
    count_sppmean |> 
    left_join(spp_min_meancount) |> 
    mutate(log_meancount = case_when(mean_count == 0 ~ log(min_meancount/2),
                                     TRUE ~ log(mean_count)))
  
  slopes_byspp <-
    count_sppmean_log |> 
    group_by(taxon, species_name) |> 
    nest() |> 
    mutate(lm     = map(data,   ~lm(.x$log_meancount ~ .x$year)),
           slope  = map_dbl(lm, ~.x$coefficients[2]), 
           r2     = map_dbl(lm, ~summary(.x)$r.squared),
           adj_r2 = map_dbl(lm, ~summary(.x)$adj.r.squared), 
           n = map_int(data, nrow)) |> 
    select(-c(data, lm))
  
  
  # Mean slope per taxon per biogeography
  fig04_data_separated <-
    slopes_byspp |> 
    left_join(species_tbl) |> 
    group_by(taxon, biogeog) |> 
    summarise(y = mean(slope, na.rm = TRUE),
              sd = sd(slope, na.rm = TRUE), 
              n = n(),
              .groups = "drop") |> 
    mutate(ci = qnorm(.95)*(sd/sqrt(n))) |> 
    filter(n > 1) |> # remove single points 
    bind_rows(tibble(taxon = "All species", y = NA, biogeog = NA)) |> 
    mutate(biogeog = factor(biogeog, 
                            levels = c("tropical", "warm", "cool"),
                            labels = c("Tropical", "Warm", "Cool"))) |> 
    mutate(taxon = factor(taxon, 
                          levels = c("All species",
                                     "Coral",  
                                     "Macroalgae", 
                                     "Invertebrate",
                                     "Vertebrate"),
                          labels = c("All species",
                                     "Coral",  
                                     "Macroalgae", 
                                     "Mobile invertebrates",
                                     "Vertebrates")
    )) |> 
    mutate(uppr = y + ci,
           lwr = y - ci) |> 
    mutate(decade_change = exp(y*10),
           decade_change_lwr = exp(lwr*10),
           decade_change_uppr = exp(uppr*10))
  
  # Mean slope for all species combined
  fig04_data_allspp <-
    slopes_byspp |> 
    ungroup() |> 
    left_join(species_tbl) |> 
    summarise(y = mean(slope, na.rm = TRUE),
              sd = sd(slope, na.rm = TRUE), 
              n = n(),
              .groups = "drop") |> 
    mutate(ci = qnorm(.95)*(sd/sqrt(n))) |> 
    mutate(taxon = "All species")|> 
    mutate(uppr = y + ci,
           lwr = y - ci) |> 
    mutate(decade_change = exp(y*10),
           decade_change_lwr = exp(lwr*10),
           decade_change_uppr = exp(uppr*10))
  
  
  
  # make the downloaded SVGs silouhettes and assign to objects
  for(i in c("Coral", "Invertebrate", "Macroalgae", "Vertebrate")){
    
    img_tmp <- 
      magick::image_read_svg(
        here("input", "images", paste0(i, ".svg")), 
        width = 1000) |> 
      magick::image_quantize(colorspace = "gray") |> 
      magick::image_colorize(opacity = 70, color = "black") 
    
    assign(paste0(i, "_svg"), img_tmp)
    
  }
  
  fig04_raw <- 
    fig04_data_separated |> 
    ggplot() + 
    aes(x = fct_cross(taxon, biogeog),
        y = decade_change) +
    geom_rect(ymin = -Inf, 
              ymax = 1, 
              xmin = -Inf, 
              xmax = Inf, 
              fill = grey_bkgrnd_col,
              col = "transparent") +
    # species separated by biogeography and taxon
    geom_point(size = 3, aes(colour = biogeog)) +
    geom_errorbar(aes(ymin = decade_change_lwr, 
                      ymax = decade_change_uppr,
                      colour = biogeog), 
                  width = 0.2,
                  show.legend = FALSE) +
    geom_text(aes(y = decade_change_lwr - 0.01, label = n), 
              vjust = 1) +
    # all species combined
    geom_point(size = 3, 
               aes(x = taxon),
               data = fig04_data_allspp) +
    geom_errorbar(aes(ymin = decade_change_lwr, 
                      ymax = decade_change_uppr, 
                      x = taxon), 
                  width = 0.2,
                  show.legend = FALSE,
                  data = fig04_data_allspp) +
    geom_text(aes(x = taxon, 
                  y = decade_change_lwr - 0.01, 
                  label = n), 
              vjust = 1, 
              data = fig04_data_allspp) +
    scale_color_manual(values = biogeog_cols) +
    facet_grid(~taxon, 
               scales = "free_x", 
               space = "free",
               switch = "both",
               labeller = label_wrap_gen(width=10)) +
    scale_y_continuous(labels = label_number(suffix = "x", 
                                             accuracy = 0.1), 
                       breaks = seq(0.4, 1.8, 0.2)) +
    coord_cartesian(ylim = c(0.5, 1.7)) +
    labs(x = "",
         y = "Decadal change", 
         title = paste0("Sites with trends based on ", nsurvs, " or more surveys (", perc_transid, "% all sites)")) +
    theme_minimal() +
    theme(axis.title.x = element_blank(),
          panel.grid = element_blank(), 
          # legend.position = "none",
          # legend.position = c(0.95, 0.95),
          # legend.justification = c(1,1),
          legend.position = "none",
          legend.title = element_blank(),
          axis.text.y = element_text(size = 14),
          axis.text.x = element_blank(),
          axis.title.y = element_text(size = 18),
          axis.line = element_line(size = 0.7),
          axis.ticks = element_line(size = 0.7),
          strip.placement = "outside",
          strip.text = element_text(size = 12),
          # legend.margin = margin(t=-0,l=0.1,b=0.1,r=0.2, unit='cm'),
          # legend.background = element_rect(size = 0.5),
          panel.spacing = unit(0.1, "lines")) +
    {if(remove_x_axis) theme(strip.text = element_blank())}
  
  # adding the icons
  ggdraw() +
    draw_plot(fig04_raw) +
    draw_image(image = Coral_svg,
               x = 3.5/13, 
               y = 0.8, 
               scale = 0.15, 
               hjust = 0.5, 
               vjust = 0.5) +
    draw_image(image = Macroalgae_svg,
               x = 5.75/13, 
               y = 0.8, 
               scale = 0.2, 
               hjust = 0.5, 
               vjust = 0.5) +
    draw_image(image = Invertebrate_svg,
               x = 8.25/13, 
               y = 0.8, 
               scale = 0.15, 
               hjust = 0.5, 
               vjust = 0.5) +
    draw_image(image = Vertebrate_svg,
               x = 11.5/13, 
               y = 0.7, 
               scale = 0.15, 
               hjust = 0.5, 
               vjust = 0.5) 
}

for_legend_only <- 
  fig04_data_separated |> 
  ggplot() + 
  aes(x = fct_cross(taxon, biogeog),
      y = decade_change) +
  geom_point(size = 4, aes(colour = biogeog)) +
  scale_color_manual(values = biogeog_cols) +
  theme_minimal() +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.margin = margin(t=-0,l=0.1,b=0.1,r=0.2, unit='cm'),
        legend.background = element_rect(size = 0.5)) 

legend_b <- 
  for_legend_only |> 
  get_legend() |> 
  as_ggplot()

# p1 <- plot_sensitivity(2)

p_sens <-
  plot_grid(
    plot_sensitivity(2, remove_x_axis = TRUE),
    plot_sensitivity(3, remove_x_axis = TRUE),
    plot_sensitivity(4), 
    rel_heights = c(1, 1, 1.1),
    ncol = 1, 
    labels = "AUTO"
  ) |> 
  plot_grid(legend_b, ncol = 1, rel_heights = c(1, .1))

save_plot(filename = here("output", "figs", "sensitivity_analysis.png"), 
          plot = p_sens, 
          base_height = 11, 
          base_asp = 0.6 )
here("output", "figs", "sensitivity_analysis.png") |> knitr::include_graphics()